In this brief example, I show how to use the GPS coordinates from the Demographic and Health Survey data and merge them to the ADM2 subnational geographic level for the country of Ethopia. Then I produce estimates using the DHS data for ADM 2 regions of the country.
This is possible by useing the GIS capacity of the sf package to spatially intersect the DHS points and the ADM 2 polygons.
library(sf)
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(mapview)
ethpoly <- st_read(dsn = "~/OneDrive - University of Texas at San Antonio//students/fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS: WGS 84
ethpoly$struct <- 1:dim(ethpoly)[1]
plot(ethpoly["struct"])
The adm2 shapefile can be found in the Diva GIS international data repository, or from the IPUMS International site below I use the ADM2 level of administrative geography.
These locations are not identified in the DHS, but by performing a spatial intersection, we can merge the DHS survey locations to the ADM 2 units
eth_dots<-st_read("~/OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETGE52FL/ETGE52FL.shp")
## Reading layer `ETGE52FL' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\ethiopia_dhs\ETGE52FL\ETGE52FL.shp' using driver `ESRI Shapefile'
## Simple feature collection with 535 features and 20 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 0 ymin: 0 xmax: 43.36409 ymax: 14.50379
## Geodetic CRS: WGS 84
eth_dots <- eth_dots[eth_dots$LATNUM>0,]
eth_adm2<-st_read("~/OneDrive - University of Texas at San Antonio//students//fikre/spatial_epi/ETH_adm2.shp")
## Reading layer `ETH_adm2' from data source `C:\Users\ozd504\OneDrive - University of Texas at San Antonio\students\fikre\spatial_epi\ETH_adm2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 72 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 32.99994 ymin: 3.322099 xmax: 47.98618 ymax: 14.89996
## Geodetic CRS: WGS 84
#merge dots to administrative data
eth_dots2000<-st_intersection(eth_dots, eth_adm2)
## although coordinates are longitude/latitude, st_intersection assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(eth_dots["DHSCLUST"])+mapview(eth_adm2["NAME_2"])
Next, I use the DHS survey data to estimate the prevalence of stunting in the ADM 2 regions.
library(haven)
dhs2000<-read_dta("~/OneDrive - University of Texas at San Antonio//students//fikre/ethiopia_dhs/ETKR41DT/ETKR41FL.DTA")
dhs2000<-zap_labels(dhs2000)
library(car)
## Loading required package: carData
dhs2000$stunting<-ifelse(dhs2000$hw5/100<=-2&dhs2000$hw5/100!=-2,1,0)
#dhs2000$sex<-dhs2000$hc27
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
dhs2000<-dhs2000%>%
mutate(wt = v005/1000000)%>%
filter(complete.cases(stunting))%>%
select(v001,stunting, wt, v021, v022)
dhs2000m<-merge(dhs2000, eth_dots2000, by.x="v001", by.y="DHSCLUST")
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
options(survey.lonely.psu = "adjust")
des<-svydesign(ids = ~v021, strata = ~v022, weights = ~wt, data=dhs2000m)
names(dhs2000m)
## [1] "v001" "stunting" "wt" "v021" "v022"
## [6] "DHSID" "DHSCC" "DHSYEAR" "CCFIPS" "ADM1FIPS"
## [11] "ADM1FIPSNA" "ADM1SALBNA" "ADM1SALBCO" "ADM1DHS" "ADM1NAME"
## [16] "DHSREGCO" "DHSREGNA" "SOURCE" "URBAN_RURA" "LATNUM"
## [21] "LONGNUM" "ALT_GPS" "ALT_DEM" "DATUM" "ID_0"
## [26] "ISO" "NAME_0" "ID_1" "NAME_1" "ID_2"
## [31] "NAME_2" "TYPE_2" "ENGTYPE_2" "NL_NAME_2" "VARNAME_2"
## [36] "geometry"
est.stunt <- svyby(~stunting, ~ID_2, des, FUN=svymean, na.rm=T)
head(est.stunt)
## ID_2 stunting se
## 1 1 0.4236812 0.02842176
## 3 3 0.4964883 0.07773643
## 4 4 0.5227321 0.10306007
## 5 5 0.5469683 0.04175304
## 6 6 0.3879567 0.10738629
## 7 7 0.6280925 0.02028507
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(mapview)
mdat<- geo_join(ethpoly, est.stunt, "ID_2","ID_2")
## Warning: `group_by_()` was deprecated in dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
mapview(mdat["stunting"])